import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class KsySpider(CrawlSpider):
    name = 'ksy'
    # allowed_domains = ['www']
    start_urls = ['http://www.heao.gov.cn/xinwenzixun/']
    count=1


    rules = (
        Rule(LinkExtractor(allow=r'list_\d+\.shtml'), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        print(self.count)
        self.count+=1

        #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
        #item['name'] = response.xpath('//div[@id="name"]').get()
        #item['description'] = response.xpath('//div[@id="description"]').get()
        # return item
